# -*- coding: utf-8 -*-
"""Categorical_Data.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1qZRN3QralX2zmt_3ev3uVYzZhY8RO_gs
"""

import pandas as pd
df = pd.DataFrame([
  ['green', 'M', 10.1, 'class2'],
  ['red', 'L', 13.5, 'class1'],
  ['blue', 'XL', 15.3, 'class2']],
  columns=['color', 'size', 'price', 'class'])
df

size_mapping = {'XL': 3, 'L': 2, 'M': 1}
df['size'] = df['size'].map(size_mapping)
df

inv_size_mapping = {v: k for k, v in size_mapping.items()}
df['size'].map(inv_size_mapping)

import numpy as np
class_mapping = {label: idx for idx, label in enumerate(np.unique(df['class']))}
class_mapping

df['classlabel'] = df['class'].map(class_mapping)
df

inv_class_mapping = {v: k for k, v in class_mapping.items()}
df['classlabel'] = df['classlabel'].map(inv_class_mapping)
df

from sklearn.preprocessing import LabelEncoder
class_le = LabelEncoder()
y = class_le.fit_transform(df['classlabel'].values)
y

class_le.inverse_transform(y)

X = df[['color', 'size', 'price']].values
color_le = LabelEncoder()
X[:, 0] = color_le.fit_transform(X[:, 0])
X

from sklearn.preprocessing import OneHotEncoder
X = df[['color', 'size', 'price']].values
color_ohe = OneHotEncoder()
color_ohe.fit_transform(X[:, 0].reshape(-1, 1)).toarray()

from sklearn.compose import ColumnTransformer
X = df[['color', 'size', 'price']].values
c_transf = ColumnTransformer([('onehot', OneHotEncoder(), [0]), ('nothing', 'passthrough', [1, 2])])
c_transf.fit_transform(X).astype(float)

pd.get_dummies(df[['price', 'color', 'size']])

pd.get_dummies(df[['price', 'color', 'size']], drop_first=True)

color_ohe = OneHotEncoder(categories='auto', drop='first')
c_transf = ColumnTransformer([('onehot', color_ohe, [0]), ('nothing', 'passthrough', [1, 2])])
c_transf.fit_transform(X).astype(float)